In [7]:
import pandas as pd 
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import plotly.express as px
import plotly.subplots as sp
from plotly.subplots import make_subplots
import plotly.graph_objects as go
In [3]:
df = pd.read_csv('diamonds.csv')
df
Out[3]:
Unnamed: 0 carat cut color clarity depth table price x y z
0 1 0.23 Ideal E SI2 61.5 55.0 326 3.95 3.98 2.43
1 2 0.21 Premium E SI1 59.8 61.0 326 3.89 3.84 2.31
2 3 0.23 Good E VS1 56.9 65.0 327 4.05 4.07 2.31
3 4 0.29 Premium I VS2 62.4 58.0 334 4.20 4.23 2.63
4 5 0.31 Good J SI2 63.3 58.0 335 4.34 4.35 2.75
... ... ... ... ... ... ... ... ... ... ... ...
53935 53936 0.72 Ideal D SI1 60.8 57.0 2757 5.75 5.76 3.50
53936 53937 0.72 Good D SI1 63.1 55.0 2757 5.69 5.75 3.61
53937 53938 0.70 Very Good D SI1 62.8 60.0 2757 5.66 5.68 3.56
53938 53939 0.86 Premium H SI2 61.0 58.0 2757 6.15 6.12 3.74
53939 53940 0.75 Ideal D SI2 62.2 55.0 2757 5.83 5.87 3.64

53940 rows × 11 columns

In [4]:
df.drop(columns="Unnamed: 0", axis=1, inplace=True)
df
Out[4]:
carat cut color clarity depth table price x y z
0 0.23 Ideal E SI2 61.5 55.0 326 3.95 3.98 2.43
1 0.21 Premium E SI1 59.8 61.0 326 3.89 3.84 2.31
2 0.23 Good E VS1 56.9 65.0 327 4.05 4.07 2.31
3 0.29 Premium I VS2 62.4 58.0 334 4.20 4.23 2.63
4 0.31 Good J SI2 63.3 58.0 335 4.34 4.35 2.75
... ... ... ... ... ... ... ... ... ... ...
53935 0.72 Ideal D SI1 60.8 57.0 2757 5.75 5.76 3.50
53936 0.72 Good D SI1 63.1 55.0 2757 5.69 5.75 3.61
53937 0.70 Very Good D SI1 62.8 60.0 2757 5.66 5.68 3.56
53938 0.86 Premium H SI2 61.0 58.0 2757 6.15 6.12 3.74
53939 0.75 Ideal D SI2 62.2 55.0 2757 5.83 5.87 3.64

53940 rows × 10 columns

In [14]:
df.duplicated().sum()
Out[14]:
146
In [15]:
df.describe()
Out[15]:
carat depth table price x y z
count 53940.000000 53940.000000 53940.000000 53940.000000 53940.000000 53940.000000 53940.000000
mean 0.797940 61.749405 57.457184 3932.799722 5.731157 5.734526 3.538734
std 0.474011 1.432621 2.234491 3989.439738 1.121761 1.142135 0.705699
min 0.200000 43.000000 43.000000 326.000000 0.000000 0.000000 0.000000
25% 0.400000 61.000000 56.000000 950.000000 4.710000 4.720000 2.910000
50% 0.700000 61.800000 57.000000 2401.000000 5.700000 5.710000 3.530000
75% 1.040000 62.500000 59.000000 5324.250000 6.540000 6.540000 4.040000
max 5.010000 79.000000 95.000000 18823.000000 10.740000 58.900000 31.800000
In [5]:
sns.scatterplot(x='carat', y='price', data=df, hue='cut')
Out[5]:
<Axes: xlabel='carat', ylabel='price'>
In [19]:
sns.pairplot(df)
Out[19]:
<seaborn.axisgrid.PairGrid at 0x17a58940850>
In [20]:
sns.scatterplot(x='cut', y='price', data=df)
Out[20]:
<Axes: xlabel='cut', ylabel='price'>
In [21]:
sns.barplot(x='cut', y='price', data=df)
Out[21]:
<Axes: xlabel='cut', ylabel='price'>
In [22]:
df50 = df.head(50)
df50
Out[22]:
carat cut color clarity depth table price x y z
0 0.23 Ideal E SI2 61.5 55.0 326 3.95 3.98 2.43
1 0.21 Premium E SI1 59.8 61.0 326 3.89 3.84 2.31
2 0.23 Good E VS1 56.9 65.0 327 4.05 4.07 2.31
3 0.29 Premium I VS2 62.4 58.0 334 4.20 4.23 2.63
4 0.31 Good J SI2 63.3 58.0 335 4.34 4.35 2.75
5 0.24 Very Good J VVS2 62.8 57.0 336 3.94 3.96 2.48
6 0.24 Very Good I VVS1 62.3 57.0 336 3.95 3.98 2.47
7 0.26 Very Good H SI1 61.9 55.0 337 4.07 4.11 2.53
8 0.22 Fair E VS2 65.1 61.0 337 3.87 3.78 2.49
9 0.23 Very Good H VS1 59.4 61.0 338 4.00 4.05 2.39
10 0.30 Good J SI1 64.0 55.0 339 4.25 4.28 2.73
11 0.23 Ideal J VS1 62.8 56.0 340 3.93 3.90 2.46
12 0.22 Premium F SI1 60.4 61.0 342 3.88 3.84 2.33
13 0.31 Ideal J SI2 62.2 54.0 344 4.35 4.37 2.71
14 0.20 Premium E SI2 60.2 62.0 345 3.79 3.75 2.27
15 0.32 Premium E I1 60.9 58.0 345 4.38 4.42 2.68
16 0.30 Ideal I SI2 62.0 54.0 348 4.31 4.34 2.68
17 0.30 Good J SI1 63.4 54.0 351 4.23 4.29 2.70
18 0.30 Good J SI1 63.8 56.0 351 4.23 4.26 2.71
19 0.30 Very Good J SI1 62.7 59.0 351 4.21 4.27 2.66
20 0.30 Good I SI2 63.3 56.0 351 4.26 4.30 2.71
21 0.23 Very Good E VS2 63.8 55.0 352 3.85 3.92 2.48
22 0.23 Very Good H VS1 61.0 57.0 353 3.94 3.96 2.41
23 0.31 Very Good J SI1 59.4 62.0 353 4.39 4.43 2.62
24 0.31 Very Good J SI1 58.1 62.0 353 4.44 4.47 2.59
25 0.23 Very Good G VVS2 60.4 58.0 354 3.97 4.01 2.41
26 0.24 Premium I VS1 62.5 57.0 355 3.97 3.94 2.47
27 0.30 Very Good J VS2 62.2 57.0 357 4.28 4.30 2.67
28 0.23 Very Good D VS2 60.5 61.0 357 3.96 3.97 2.40
29 0.23 Very Good F VS1 60.9 57.0 357 3.96 3.99 2.42
30 0.23 Very Good F VS1 60.0 57.0 402 4.00 4.03 2.41
31 0.23 Very Good F VS1 59.8 57.0 402 4.04 4.06 2.42
32 0.23 Very Good E VS1 60.7 59.0 402 3.97 4.01 2.42
33 0.23 Very Good E VS1 59.5 58.0 402 4.01 4.06 2.40
34 0.23 Very Good D VS1 61.9 58.0 402 3.92 3.96 2.44
35 0.23 Good F VS1 58.2 59.0 402 4.06 4.08 2.37
36 0.23 Good E VS1 64.1 59.0 402 3.83 3.85 2.46
37 0.31 Good H SI1 64.0 54.0 402 4.29 4.31 2.75
38 0.26 Very Good D VS2 60.8 59.0 403 4.13 4.16 2.52
39 0.33 Ideal I SI2 61.8 55.0 403 4.49 4.51 2.78
40 0.33 Ideal I SI2 61.2 56.0 403 4.49 4.50 2.75
41 0.33 Ideal J SI1 61.1 56.0 403 4.49 4.55 2.76
42 0.26 Good D VS2 65.2 56.0 403 3.99 4.02 2.61
43 0.26 Good D VS1 58.4 63.0 403 4.19 4.24 2.46
44 0.32 Good H SI2 63.1 56.0 403 4.34 4.37 2.75
45 0.29 Premium F SI1 62.4 58.0 403 4.24 4.26 2.65
46 0.32 Very Good H SI2 61.8 55.0 403 4.35 4.42 2.71
47 0.32 Good H SI2 63.8 56.0 403 4.36 4.38 2.79
48 0.25 Very Good E VS2 63.3 60.0 404 4.00 4.03 2.54
49 0.29 Very Good H SI2 60.7 60.0 404 4.33 4.37 2.64
In [24]:
sns.scatterplot(x='cut', y='price', data=df50)
Out[24]:
<Axes: xlabel='cut', ylabel='price'>
In [31]:
df_num = df[['carat', 'table', 'x', 'y', 'z', 'price']].corr()
In [32]:
sns.heatmap(df_num, annot=True)
Out[32]:
<Axes: >
In [6]:
sns.scatterplot(x='carat', y='price', data=df, hue='color')
Out[6]:
<Axes: xlabel='carat', ylabel='price'>
In [12]:
sns.countplot(x='color', data=df)
Out[12]:
<Axes: xlabel='color', ylabel='count'>
In [13]:
sns.countplot(x='cut', data=df)
Out[13]:
<Axes: xlabel='cut', ylabel='count'>
In [16]:
scatter_3d_fig = px.scatter_3d(df, x='carat', y='cut', z='price', color='cut',
                               title='3D Scatter Plot of Carat, Cut, and Price',
                               labels={'carat': 'Carat', 'cut': 'Cut Quality', 'price': 'Price'})
scatter_3d_fig.show()
In [18]:
scatter_3d_fig = px.scatter_3d(df, x='carat', y='color', z='price', color='color',
                               title='3D Scatter Plot of Carat, Cut, and Price',
                               labels={'carat': 'Carat', 'color': 'Color', 'price': 'Price'})
scatter_3d_fig.show()
In [19]:
scatter_fig = px.scatter(df, x='carat', y='price', animation_frame='cut', color='cut',
                         title='Animated Scatter Plot: Carat vs Price Colored by Cut')
scatter_fig.show()
In [ ]: